/**
 * 
 */
package br.coppe.coimbra.document.doc.type;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import br.coppe.coimbra.document.doc.DocIF;
import br.coppe.coimbra.processing.text.Str;

/**
 * @author phillipe
 *
 */
public class Doc extends DocumentType implements DocIF {

	public Doc(File file) {

		super(file);
	}

	@Override
	public String read() {

		String text = "";

		try {

			FileInputStream fis = new FileInputStream(super.file);
			
			POIFSFileSystem fs = new POIFSFileSystem(fis);
		
			WordExtractor we = new WordExtractor(fs);
			
			text = we.getText();
			text = Str.cleanExtraSpaces(text);
//			text = text.replaceAll("\\p{XDigit}", "");
//			System.out.println(text);
		} catch (FileNotFoundException fnfe) {

			System.err.println("Arquivo não encontrado.");
			System.err.println(fnfe.getMessage());
			fnfe.printStackTrace();
			
		} catch (IOException ioe) {

			System.err.println("Erro de leitura do arquivo.");
			System.err.print(ioe.getMessage());
			ioe.printStackTrace();
		}
		return text;
	}



}
